#Step 1: Wake up packages

library(tidyverse)
library(lubridate)
library(httpuv)
library(leaflet)

#Step 2: Load the data

stops <- read_csv("https://datajournalism.tech/wp-content/uploads/2019/10/wichita.csv")
## Warning: Missing column names filled in: 'X1' [1]

#Step 3: Fix values

population_2016 <- tibble(subject_race=c("asian/pacific islander","black","hispanic","other/unknown","white"),num_people=c(19294,42485,65090,16686,245499))
center_lat <- 37.689811
center_lng <- -97.332332

#Step 4: Examine the data

str(stops)
## Classes 'spec_tbl_df', 'tbl_df', 'tbl' and 'data.frame': 57750 obs. of  22 variables:
##  $ X1                     : num  1 2 3 4 5 6 7 8 9 10 ...
##  $ raw_row_number         : chr  "923578" "923657" "912091" "923680" ...
##  $ date                   : Date, format: "2016-01-01" "2016-01-01" ...
##  $ time                   : 'hms' num  18:00:00 18:08:00 18:11:00 18:13:00 ...
##   ..- attr(*, "units")= chr "secs"
##  $ location               : chr  "N WEST ST, KS, 67205" "8000 W 13TH ST N, WICHITA, KS, 67212" "500 S LIMUEL ST, WICHITA, KS, 67235" "7600 W 21ST ST N, WICHITA, KS, 67205" ...
##  $ lat                    : num  37.7 37.7 37.7 37.7 37.7 ...
##  $ lng                    : num  -97.4 -97.4 -97.5 -97.4 -97.4 ...
##  $ subject_age            : num  16 44 20 21 28 27 15 20 23 NA ...
##  $ subject_race           : chr  "white" "white" "white" "hispanic" ...
##  $ subject_sex            : chr  "female" "male" "male" "female" ...
##  $ type                   : chr  "vehicular" "vehicular" "vehicular" "vehicular" ...
##  $ disposition            : chr  "DISMISSED" "GUILTY (IVR)" "DISMISSED WITH PREJUDICE; DISMISSED WITH PREJUDICE" "GUILTY" ...
##  $ violation              : chr  "RUN STOP SIGN" "SPEED OVER LIMIT" "DUI; INATTENTIVE DRIVING" "SPEED OVER LIMIT" ...
##  $ citation_issued        : logi  TRUE TRUE TRUE TRUE TRUE TRUE ...
##  $ outcome                : chr  "citation" "citation" "citation" "citation" ...
##  $ posted_speed           : num  NA 40 NA 40 40 40 NA NA NA NA ...
##  $ vehicle_color          : chr  "BURGUNDY OR MAROON" "\"ALUMINUM, SILVER\"" "WHITE" "\"ALUMINUM, SILVER\"" ...
##  $ vehicle_make           : chr  "JEEP (1989 TO PRESENT)" "HYUNDAI" "HONDA" "TOYOTA" ...
##  $ vehicle_model          : chr  NA "TUCSON" NA NA ...
##  $ vehicle_year           : num  2008 NA NA NA NA ...
##  $ raw_defendant_race     : chr  "W" "W" "W" "W" ...
##  $ raw_defendant_ethnicity: chr  "N" "N" "N" "H" ...
##  - attr(*, "spec")=
##   .. cols(
##   ..   X1 = col_double(),
##   ..   raw_row_number = col_character(),
##   ..   date = col_date(format = ""),
##   ..   time = col_time(format = ""),
##   ..   location = col_character(),
##   ..   lat = col_double(),
##   ..   lng = col_double(),
##   ..   subject_age = col_double(),
##   ..   subject_race = col_character(),
##   ..   subject_sex = col_character(),
##   ..   type = col_character(),
##   ..   disposition = col_character(),
##   ..   violation = col_character(),
##   ..   citation_issued = col_logical(),
##   ..   outcome = col_character(),
##   ..   posted_speed = col_double(),
##   ..   vehicle_color = col_character(),
##   ..   vehicle_make = col_character(),
##   ..   vehicle_model = col_character(),
##   ..   vehicle_year = col_double(),
##   ..   raw_defendant_race = col_character(),
##   ..   raw_defendant_ethnicity = col_character()
##   .. )
summary(stops)
##        X1        raw_row_number          date                time         
##  Min.   :    1   Length:57750       Min.   :2016-01-01   Length:57750     
##  1st Qu.:14438   Class :character   1st Qu.:2016-03-16   Class1:hms       
##  Median :28876   Mode  :character   Median :2016-05-29   Class2:difftime  
##  Mean   :28876                      Mean   :2016-06-10   Mode  :numeric   
##  3rd Qu.:43313                      3rd Qu.:2016-08-31                    
##  Max.   :57750                      Max.   :2016-12-31                    
##                                                                           
##    location              lat             lng           subject_age   
##  Length:57750       Min.   :37.47   Min.   :-101.36   Min.   :11.00  
##  Class :character   1st Qu.:37.67   1st Qu.: -97.37   1st Qu.:24.00  
##  Mode  :character   Median :37.69   Median : -97.34   Median :33.00  
##                     Mean   :37.69   Mean   : -97.33   Mean   :36.71  
##                     3rd Qu.:37.70   3rd Qu.: -97.28   3rd Qu.:48.00  
##                     Max.   :38.48   Max.   : -96.75   Max.   :99.00  
##                     NA's   :1167    NA's   :1167      NA's   :10128  
##  subject_race       subject_sex            type          
##  Length:57750       Length:57750       Length:57750      
##  Class :character   Class :character   Class :character  
##  Mode  :character   Mode  :character   Mode  :character  
##                                                          
##                                                          
##                                                          
##                                                          
##  disposition         violation         citation_issued   outcome         
##  Length:57750       Length:57750       Mode:logical    Length:57750      
##  Class :character   Class :character   TRUE:57750      Class :character  
##  Mode  :character   Mode  :character                   Mode  :character  
##                                                                          
##                                                                          
##                                                                          
##                                                                          
##   posted_speed    vehicle_color      vehicle_make       vehicle_model     
##  Min.   : 20.00   Length:57750       Length:57750       Length:57750      
##  1st Qu.: 30.00   Class :character   Class :character   Class :character  
##  Median : 40.00   Mode  :character   Mode  :character   Mode  :character  
##  Mean   : 39.93                                                           
##  3rd Qu.: 40.00                                                           
##  Max.   :304.00                                                           
##  NA's   :35149                                                            
##   vehicle_year   raw_defendant_race raw_defendant_ethnicity
##  Min.   :1962    Length:57750       Length:57750           
##  1st Qu.:2001    Class :character   Class :character       
##  Median :2005    Mode  :character   Mode  :character       
##  Mean   :2005                                              
##  3rd Qu.:2009                                              
##  Max.   :2999                                              
##  NA's   :43236
colnames(stops)
##  [1] "X1"                      "raw_row_number"         
##  [3] "date"                    "time"                   
##  [5] "location"                "lat"                    
##  [7] "lng"                     "subject_age"            
##  [9] "subject_race"            "subject_sex"            
## [11] "type"                    "disposition"            
## [13] "violation"               "citation_issued"        
## [15] "outcome"                 "posted_speed"           
## [17] "vehicle_color"           "vehicle_make"           
## [19] "vehicle_model"           "vehicle_year"           
## [21] "raw_defendant_race"      "raw_defendant_ethnicity"
nrow(stops)
## [1] 57750

#Step 5: Count stops by race

stops %>% 
  count(subject_race)
## # A tibble: 5 x 2
##   subject_race               n
##   <chr>                  <int>
## 1 asian/pacific islander  1607
## 2 black                   8038
## 3 hispanic                6709
## 4 other/unknown           9335
## 5 white                  32061
stops %>%
  group_by(subject_race) %>%
  summarize(n=n(),prop=n/nrow(.))
## # A tibble: 5 x 3
##   subject_race               n   prop
##   <chr>                  <int>  <dbl>
## 1 asian/pacific islander  1607 0.0278
## 2 black                   8038 0.139 
## 3 hispanic                6709 0.116 
## 4 other/unknown           9335 0.162 
## 5 white                  32061 0.555

#Step 6: Compare to total population

population_2016 %>%
  mutate(prop=num_people/sum(num_people))
## # A tibble: 5 x 3
##   subject_race           num_people   prop
##   <chr>                       <dbl>  <dbl>
## 1 asian/pacific islander      19294 0.0496
## 2 black                       42485 0.109 
## 3 hispanic                    65090 0.167 
## 4 other/unknown               16686 0.0429
## 5 white                      245499 0.631

#Step 7: Join data

race <- stops %>%
  count(subject_race)%>%
  left_join(population_2016,by="subject_race")%>%
  mutate(stop_rate=n/num_people)

#Step 8: Data visualization, bar chart

bar <- ggplot(race,aes(x=reorder(subject_race,stop_rate),y=stop_rate))+
  geom_bar(stat="identity", fill="blue")+
  geom_hline(yintercept=0)+
  labs(title="Stopped Drivers by Race",subtitle="Wichita, KS 2016")+
  coord_flip()
options(scipen=10000)

bar

#Step 9: Data visualization, map

race <- colorFactor(c("pink", "black", "yellow", "red", "blue"),
                    domain=c("white", "black", "asian/pacific islander", "hispanic", "other/unknown"),
                    ordered=TRUE)
ac <- stops %>% drop_na(lat, lng)

map <- leaflet(ac) %>%
  addProviderTiles(providers$CartoDB) %>% 
  setView(lng= -97.318689, lat= 37.683568, zoom=16) %>% 
  addCircleMarkers(~lng,
                   ~lat,
                   popup=paste("This is a/an", ac$subject_race, "and", ac$subject_sex, "driver."),
                   weight= 3, 
                   radius=4, 
                   color=~race(subject_race),
                   stroke=F,
                   fillOpacity = 1)

map